# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup

from runtimeError import runtimeError


def analyse_html(html_path):
    try:
        columnArray = []
        elementArray = []
        with open(html_path, 'rb') as file:
            html = file.read()
            bs = BeautifulSoup(html, "html.parser")  # 缩进格式
            bs.prettify()  # 格式化html结构
            tableId = bs.table["id"]
            tHead = bs.table.thead.tr.find_all(name="th")
            tBody = bs.table.tbody.find_all(name="tr")
            if tHead[0].string is None:
                tHead = bs.select("div.DataTables_sort_wrapper")
            # 制作表头
            for th in tHead:
                columnArray.append(th.text)
            tempRow = []
            if tableId == "prr":
                for tr in tBody:
                    td = tr.find_all("td")
                    for pr_ele in td:
                        tempRow.append(pr_ele.string)
                    elementArray.append(tempRow.copy())
                    tempRow.clear()
            else:
                for tr in tBody:
                    td = tr.find_all("td")
                    if len(td[0].attrs) > 0:
                        for ele in td:
                            if ele.string is None:
                                eventArray = ele.select("ul li")
                                s = ""
                                for event in eventArray:
                                    s = s + event.string + "\t"
                                tempRow.append(s)
                            else:
                                tempRow.append(ele.string)
                    else:
                        if len(tempRow) > 0:
                            for postEle in td:
                                tempRow.append(postEle.string)
                            elementArray.append(tempRow.copy())
                        else:
                            for i in range(0, 8):
                                tempRow.append("")
                            for postEle in td:
                                tempRow.append(postEle.string)
                            elementArray.append(tempRow.copy())
                        tempRow.clear()
                if len(tempRow) > 0:
                    elementArray.append(tempRow)
        return columnArray, elementArray
    except Exception as e:
        raise runtimeError("解析html文件遇到问题。具体：" + e.args[0])
